Developing Indicators of Habitat and Ecosystem Change in the Gulf of Maine

Study area

The focus area for this project is coastal Maine, extending from the coast to the eastern boundary of the NOAA statistical areas 511, 512, 513.

usStates <- rnaturalearth::ne_states("united states of america", returnclass = "sf")
ne_us <- usStates %>% filter(name == "Maine")
statarea <- sf::st_read(paste0(gmRi::shared.path(group = "Res_Data", folder = "Shapefiles/Statistical_Areas"), "Statistical_Areas_2010_withnames.shp"), quiet = TRUE) %>% 
  filter(Id %in% c(511, 512, 513)) %>% 
  mutate(Id = as.factor(Id))
statarea_sf <- sf::st_simplify(statarea, dTolerance = .05)
mcc_turnoff_sf <- sf::st_read(here::here("Data/Shapefiles/MCC_turnoff/MCC_turnoff.shp"), quiet = TRUE)

ggplot() +  
  geom_sf(data = statarea_sf, aes(fill = Id)) + 
  geom_sf(data= ne_us, fill = "grey") +
  geom_sf(data = mcc_turnoff_sf, fill = "transparent", color = "black") +
  scale_fill_discrete(name = "Stat area") +
  theme(panel.background = element_blank(), 
        panel.grid = element_blank(), 
        axis.title = element_blank(),
        axis.ticks = element_blank())
Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Indicators

The following indicators are used in this report

  • Surface and bottom temperature anomalies (FVCOM, OISST)
  • Surface and bottom salinity anomalies (FVCOM)
  • Maine Coastal Current Index (FVCOM)
  • Species-based lobster predator index (NOAA Trawl Survey)
  • Continuous Plankton Recordings
# Indicators
FVCOM <- read_csv(here::here("Indicators/FVCOM_stat_area_temps.csv")) %>% filter(Year >= 1980)
OISST <- read_csv(here::here("Indicators/OISST_stat_area_anoms.csv")) %>% filter(Date <= as.Date("2020-12-31"))
sal <- read_csv(here::here("Indicators/FVCOM_stat_area_sal.csv")) %>% filter(Year >= 1980)
maineCC <- read_csv(here::here("Indicators/mcc_pca_pc1_2.csv")) %>% 
  rename("Year" = yr, "Month" = mon)
species_index <- read_rds(here::here("Indicators/nmfs_trawl_lobster_predator_indices.rdata")) %>% 
  rename("Year" = year)
mcc_turnoff_subset <- read_csv("/Users/mdzaugis/Documents/EcosystemIndicators/Indicators/mcc_turnoff_subset.csv")
cprData <- read_csv(here::here("Indicators/cpr_focal_pca_timeseries_period_1961-20017.csv")) %>% 
  mutate(`First Mode` = `First Mode`*-1) %>%
  rename("Year" = year,
         "FirstMode" = `First Mode`,
         "SecondMode" = `Second Mode`) %>% 
  select(Year, FirstMode, SecondMode)

Temperature

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface and bottom layer (1 m above bathymetry)
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area
  • Baseline climatology 1990-2020
yrOISST <- OISST %>% 
  mutate(Year = lubridate::year(Date)) %>% 
  group_by(Year, stat_area) %>% 
  summarise(temp_var = var(temp),
            temp = mean(temp),
            .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area))

yrFVCOM <- FVCOM %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_temp = mean(sur_temp_anom),
            bot_temp = mean(bot_temp_anom), 
            sur_temp_var = var(sur_temp_anom),
            bot_temp_var = var(bot_temp_anom),
            .groups="drop") %>% 
  mutate(stat_area = as.factor(stat_area))

tPlot1 <- yrOISST %>% 
  ggplot() +
  geom_line(aes(Year, temp, col = stat_area)) + 
  theme_bw()  +
  labs(y = "SST anomalies (deg C)")

tPlot2 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, bot_temp, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom T anomalies (deg C)")

tPlot1/tPlot2

Salinity

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface and bottom layer (1 m above bathymetry)
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area for each year
  • Baseline climatology 1990-2017 (last year of data)
yrSal <- sal %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_sal = mean(sur_sal_anom),
            bot_sal = mean(bot_sal_anom),
            sur_sal_var = var(sur_sal_anom),
            bot_sal_var = var(bot_sal_anom), .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area))

sPlot1 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, sur_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "SSS anomalies (deg C)")

sPlot2 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, bot_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom S anomalies (deg C)")

sPlot1/sPlot2

Maine Coastal Current Index

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface layer
  • Crop to Maine Coastal Current interest area
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area for each year
  • See MCC_index_report.Rmd for details
summerMCC <- maineCC %>% 
  filter(Month %in% c(6,7,8,9)) %>% 
  group_by(Year) %>% 
  summarise(MCCPC1 = mean(PC1))

yrMCC <- maineCC %>% 
  group_by(Year) %>% 
  summarise(MCCPC1 = mean(PC1))

MCCPC_plot <- yrMCC %>% 
  mutate(c = ifelse(MCCPC1 > 0, 1, 2)) %>% 
  ggplot() + geom_col(aes(x = Year, y = MCCPC1, fill = as.factor(c))) + theme_bw() + theme(legend.position = "none") + labs(x = "Year", y = "Maine Coastal Current PC1")

MCCPC_maps <- mcc_turnoff_subset %>% 
  mutate(Year = lubridate::year(as.Date(Date)),
         Month = lubridate::month(as.Date(Date))) %>% 
  filter(Year %in% c(1980, 1987, 2000, 2011, 2012),
         Month %in% c(6,7,8,9)) %>% 
  group_by(Year, lat, lon) %>% 
  summarise(u = mean(u),
            v = mean(v), .groups = "drop") %>% 
  mutate(vel = sqrt(u^2+v^2), 
         PC = if_else(Year == 2010, "negative PC1", if_else(Year ==2013, "positive PC1", "neutral PC1"))) %>% 
  ggplot() + geom_sf(data= ne_us, fill = "grey") + 
  geom_segment(aes(x = lon, y = lat, xend=lon+u, yend=lat+v, color = vel), 
               arrow = arrow(angle = 10, length = unit(0.05, "inches"), type = "closed")) + 
  scale_color_viridis_c() + theme_bw() + 
  coord_sf(datum = "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0") + 
  facet_wrap(~Year, nrow = 1)+ 
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        axis.title = element_blank(), axis.text = element_blank(),
        axis.ticks = element_blank())

MCCPC_plot / MCCPC_maps

Species Based Predator Index

Source: NOAA NE Fisheries Trawl Survey Methods:

  • Filtered to 15 lobster predators (ASMFC 2020)
  • Cropped to NOAA stat areas 511, 512, 513
  • Stratum abundance: abundance per trawl summed over stat area
  • Stratified abundance: abundance / km2 multiplied by the area of the strata
yrSpecies_index <- 
  species_index %>% 
  filter(season == "Both",
         `stratum id` != 'Strata 511-513') %>% 
  dplyr::select(Year, `stratum id`, "predators" = `stratified biomass (kg)`)

yrSpecies_index %>% 
  ggplot() +
  geom_line(aes(Year, predators, color = `stratum id` )) + theme_bw()

CPR data

Source: Continuous Plankton Recording

  • First Mode = smaller zooplankton species explains ~ 50% of variance
  • Second Mode = Calanus explains ~ 26% of variance
cprData %>% 
  pivot_longer(cols = c(FirstMode, SecondMode), 
               names_to = "Mode", values_to = "values") %>% 
  ggplot() +
  geom_line(aes(Year, values, col = Mode)) +
  theme_bw() +
  scale_color_discrete(label = c("Small spp", "Calanus"))

# area weighted
# regional time series 
# area weighted average 

Indicators PCA

allIndex <- left_join(yrFVCOM, yrSal, by = c("Year", "stat_area")) %>% 
  left_join(yrOISST, by = c("Year", "stat_area")) %>% 
  left_join(yrMCC, by = c("Year" = "Year")) %>% 
  left_join(yrSpecies_index, by = c("Year" = "Year",
                                    "stat_area" = "stratum id")) %>% 
  left_join(cprData, by = "Year") %>% 
  na.omit() %>% 
  select(-sur_temp, -sur_sal, -sur_sal_var, -bot_sal_var, -sur_temp_var, -bot_temp_var, -temp_var, -temp)

Previous analysis shows surface and bottom temperature and surface and bottom salinity load similarly in a PCA. To reduce variables only bottom temperature and salinity are used in the following PCA.

  • Data: Bottom temp, bottom salinity, MCC, Predator Index, CPR data mode one and two
  • Method: Principal components analysis
  • Years: 1990-2016

PCA Summary

For each stat area, the first two principal components have eigenvalues greater than one and were retained for further analysis. PC1 and PC2 explain 34.8% and 24.0% of the variability, respectively, in stat area 511, 30.5% and 23.2% in stat area 512, and 33.2% and 23.3% in stat area 513.

# PCA by stat area
indicator_pca <- allIndex %>% 
  nest(data = c(-stat_area),
       Year = c(Year)) %>% 
  mutate(data = map(data, ~column_to_rownames(.x, var = "Year")),
         pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
         pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
                                      "PC2" = .x$x[,2],
                                      "PC3" = .x$x[,3])))

index_pca <- indicator_pca %>% 
  unnest(c(pca_index, Year, data)) %>% 
  mutate(PC2 = if_else(stat_area == "511", PC2, PC2),
         PC1 = if_else(stat_area == "512", PC1, PC1),
         PC3 = if_else(stat_area == "513", PC3, PC3))

PCA511 <- data.frame(summary(indicator_pca$pca[[1]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "511")
PCA512 <- data.frame(summary(indicator_pca$pca[[2]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "512")
PCA513 <- data.frame(summary(indicator_pca$pca[[3]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "513")

PCA_table <- bind_rows(PCA511,PCA512,PCA513)

knitr::kable(PCA_table)
Result PC1 PC2 PC3 PC4 PC5 PC6 stat_area
Standard deviation 1.444888 1.200781 0.9993327 0.7902938 0.7644566 0.5126402 511
Proportion of Variance 0.347950 0.240310 0.1664400 0.1040900 0.0974000 0.0438000 511
Cumulative Proportion 0.347950 0.588260 0.7547100 0.8588000 0.9562000 1.0000000 511
Standard deviation 1.353820 1.178900 1.0157298 0.8498775 0.8141508 0.6004372 512
Proportion of Variance 0.305470 0.231630 0.1719500 0.1203800 0.1104700 0.0600900 512
Cumulative Proportion 0.305470 0.537110 0.7090600 0.8294400 0.9399100 1.0000000 512
Standard deviation 1.410562 1.182894 1.0209041 0.8574840 0.7567640 0.5107456 513
Proportion of Variance 0.331610 0.233210 0.1737100 0.1225500 0.0954500 0.0434800 513
Cumulative Proportion 0.331610 0.564820 0.7385300 0.8610700 0.9565200 1.0000000 513
scree1 <- fviz_eig(indicator_pca$pca[[1]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree2 <- fviz_eig(indicator_pca$pca[[2]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree3 <- fviz_eig(indicator_pca$pca[[3]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))

#scree1 / scree2 / scree3

PC time series

Below are the time series for PC1, PC2, and PC3. PC1 and PC2 time series are highly correlated between stat areas.

PC1plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC1, color = stat_area))

PC2plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC2, color = stat_area))

PC3plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC3, color = stat_area))

pc1cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC1_511, PC1_512)[[2]],
         "511_513" = corrr::correlate(PC1_511, PC1_513)[[2]],
         "512_513" = corrr::correlate(PC1_512, PC1_513)[[2]])

pc2cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC2_511, PC2_512)[[2]],
         "511_513" = corrr::correlate(PC2_511, PC2_513)[[2]],
         "512_513" = corrr::correlate(PC2_512, PC2_513)[[2]])

pc3cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC3_511, PC3_512)[[2]],
         "511_513" = corrr::correlate(PC3_511, PC1_513)[[2]],
         "512_513" = corrr::correlate(PC3_512, PC3_513)[[2]])

PC1plot / PC2plot / PC3plot

Loadings plot

Indicators load similarly for each stat area. The Maine coastal current, first mode (small zooplankton), and second mode (Calanus) tend to load together. The lobster predator index has the most variable relationship to PC1 and PC2 with respect to stat area. In stat area 512 and 513, predators loads with bottom temperature. In area 511, predators load separated from the rest of the indicators and has a stronger influence on PC2 than PC1. Bottom salinity and the plankton FirstMode (Calanus) tend to load similarly for each stat area.

Interpretation:

  • PC1: Bottom salinity and Maine Coastal Current load directly opposite and are the largest contributors to PC1
  • PC1: Contribution of bottom temperature increases from stat area 511 to 513
  • PC2: Small zooplankton and Calanus group together and load opposite to bottom temperature and predators
Figure 4. Loadings of the variables

Figure 4. Loadings of the variables

Indicator PC1_511 PC1_512 PC1_513 PC2_511 PC2_512 PC2_513 PC3_511 PC3_512 PC3_513
bot_temp 0.393 0.458 0.537 -0.475 -0.418 -0.348 0.214 0.055 0.220
bot_sal 0.610 0.612 0.546 0.013 0.103 0.232 0.199 0.038 -0.144
MCCPC1 -0.575 -0.583 -0.462 -0.083 -0.107 -0.260 -0.139 0.190 0.486
predators -0.264 0.132 0.395 -0.298 -0.306 -0.447 0.762 0.813 0.408
FirstMode 0.223 0.235 0.209 0.611 0.599 0.581 0.008 -0.056 0.146
SecondMode -0.153 -0.058 -0.009 0.552 0.593 0.469 0.560 0.543 0.712
Figure 4. Loadings of the variables

Figure 4. Loadings of the variables

Chronolgical cluster

The cluster plot groups years that are most similar based on PC1 and PC2. For this analysis stat areas were grouped together.

clusfun <- function(x){
  wss <- (nrow(x)-1)*sum(apply(x,2,var)) # get sum of squares

  for (i in 2:12) wss[i] <- sum(kmeans(x,
     centers=i)$withinss)
  
  return(wss)
}

allIndex_ca <- allIndex %>% 
  nest(data = -stat_area) %>% 
  mutate(data = map(data, ~column_to_rownames(.x, var = "Year")),
         data = map(data, ~scale(.x)),
         wss = map(data, ~clusfun(.x)))
  

# look for break in plot like a scree plot
kmeans_scree <- allIndex_ca %>% 
  unnest(wss) %>% 
  add_column("x" = rep(c(1:12),3)) %>% 
  ggplot() + 
  geom_line(aes(x = x, y = wss, col = stat_area)) +
  labs(x = "Number of Clusters",
  y ="Within groups sum of squares")

# K-Means Cluster Analysis
fit <- allIndex_ca %>% 
  mutate(fit = map(data, ~kmeans(.x, 3))) # 4 cluster solution

(fviz_cluster(object = fit$fit[[1]], data = fit$data[[1]]) + 
  theme_bw() + biplot511) /
(fviz_cluster(object = fit$fit[[2]], data = fit$data[[2]]) + 
  theme_bw() + biplot512) /
(fviz_cluster(object = fit$fit[[3]], data = fit$data[[3]]) + 
  theme_bw() + biplot513)

Breakpoint Analysis

The breakpoint analysis estimates a change in the linear relationship in the data. The location of the break indicates there may be a difference in the relationship of the variable before and after that point. We find that breakpoints change depending on the variable.

Breakpoint of indicators
# breapoint function
bp_analysis <- function(x){
  mod <-  lm(values ~ Year, data = x)
  o <- tryCatch(segmented::segmented(mod, seg.Z = ~Year, psi = c(2005)),  # need to estimate bp
                error = function(cond){cond})
}

set.seed(25)
# Breakpoint by stat area
indicator_breakpoint <- allIndex %>%
  pivot_longer(cols = c(bot_temp, bot_sal, MCCPC1, predators, FirstMode, SecondMode), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator)) %>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi)))

# get fitted values and plot over data
bp_plots <- list()
for(i in 1:length(indicator_breakpoint$data)){
  
  name_bp <- paste0(indicator_breakpoint$stat_area[[i]], "_", indicator_breakpoint$Indicator[[i]])
  
    # get the fitted data
  fitted_df <- fitted(indicator_breakpoint$seg[[i]])
  
  if(is.null(fitted_df)){
    
    "check error log"
    
  } else {
      
    seg_model <- data.frame(Year = indicator_breakpoint$data[[i]]$Year, values = fitted_df)
  
  # plot the fitted model
  
    bp_plots[[name_bp]] <- indicator_breakpoint$data[[i]] %>% 
      ggplot() + geom_line(aes(Year, values)) +
      geom_line(data = seg_model, aes(x = Year, y = values), col = "dark red")
  }
}



# extract the breakpoints
break_years <- indicator_breakpoint %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est.) %>% 
  mutate(Indicator = factor(Indicator, levels = c("MCCPC1", "predators", "bot_sal", "bot_temp", "FirstMode", "SecondMode")))

# plot break points
ggplot(break_years) +
  geom_point(aes(Est., Indicator, color = stat_area, shape = stat_area), size = 3) +
  scale_color_grey(name = "Stat area") +
  scale_shape(name = "Stat area") +
  labs(x = "Estimated breakpoint")

Breakpoint of PC1 and PC2
set.seed(8)
# Breakpoint by stat area
pc1_breakpoint <- index_pca %>%
  select(stat_area, Year, PC1, PC2)  %>%
  pivot_longer(cols = c(PC1, PC2), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator))%>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi))) %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est., St.Err)

knitr::kable(pc1_breakpoint)
stat_area Indicator Est. St.Err
511 PC1 2004.646 1.557871
511 PC2 1994.000 5.878710
512 PC1 2004.729 1.780807
512 PC2 2009.000 9.367909
513 PC1 1997.193 2.662064
513 PC2 1998.000 4.618772

Lobster Data

ALSI <- read_csv(here::here("Biological_data/SettlementIndex.csv"))
ME_landings <- read_csv(here::here("Biological_data/ME_lob_landings_1950_2019.csv")) %>% 
  add_column("stat_area" = "State Wide")
ASFMCindicies <- read_csv(here::here("Biological_data/GOMGBK_indices.csv")) %>% 
  rename("lob_index" = Index, "name" = Survey)
GOMindex <- ASFMCindicies %>% 
  filter(name %in% c("MeFQ2", "MeMQ2")) %>% 
  add_column("stat_area" = "State Wide")
NEFSCindex <- ASFMCindicies %>% 
  filter(name %in% c("NefscFQ2", "NefscMQ2")) %>% 
  add_column("stat_area" = "State Wide")
sublegal <- readxl::read_excel(here::here("Biological_data/RawData.xlsx")) %>% 
  rename("Year" = year, "Month" = month, "stat_area" = `stat area`)
MEDMR_trawl <- read_csv(paste0(gmRi::box_path("Res_Data", "Maine_NH_Trawl"), "MaineDMR_Trawl_Survey_Catch_Data_2021-05-14.csv")) %>% 
  filter(Common_Name == "Lobster American")
NEFSC_meIndex <- read_csv(here::here("Biological_data/nmfs_trawl_lobster_indices.csv")) %>% 
  rename("Year" = est_year, "stat_area" = lobster_strata) %>% 
  mutate(stat_area = as.factor(stat_area))

ALSI index

Source: American Lobster Settlement Index data portal Sites:

  • Jonesport, Length: 2002-2018, stat area: 511
  • Mt. Desert Island, Length: 2000-2018, stat area: 512
  • Outer Penobscot Bay, Length: 2000-2018, stat area: 512
  • Mid-coast, Length: 1989-2018, stat area: 513
  • Casco Bay, Length: 2000-2018, stat area: 513
  • York, Length: 2000-2018, stat area: 513

Methods:

  • Sites grouped by NOAA stat area
  • Time series cropped to shortest length
  • Averaged by stat area
statKey <- data.frame("Location" = c('Jonesport',
'Mt. Desert Island',
'Outer Pen Bay',
'Mid-coast',
'Casco Bay',
'York'), "stat_area" = c(511,512,512,513,513,513))

ALSI <- ALSI %>% 
  left_join(statKey, by = "Location") %>% 
  filter(!is.na(stat_area),
         Year >= 2000) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = mean(Yoy_density), .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area),
         name = "ALSI") %>% 
  na.omit()

ALSI_cors <- ALSI %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])

ALSI_plot <- ALSI %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "ALSI yoy density")

bp_analysis <- function(x, psi){
  mod <-  lm(lob_index ~ Year, data = x)
  o <- tryCatch(segmented::segmented(mod, seg.Z = ~Year, psi = psi),  # need to estimate bp
                error = function(cond){cond})
}

ALSI_bp <- ALSI %>% 
  nest(data = -c(stat_area, name)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(2005))),
         bp1 = purrr::map(mod, ~.x$psi[[2]]))

ALSI_plot

Sublegal index

Source: Ventless trap survey

Calculate stratified means

  • Calculate catch per unit effort for each site for each year
  • Multiply cpue by the depth strata area factor
  • Group by stat area, sum the outputs
strata_area <- tibble("stat_area" = c(511,512,513),
                                 "1" = c(122, 566, 315),
                                 "2" = c(82, 395, 338),
                                 "3" = c(92, 420, 198)) %>% 
  pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_area") %>% 
  group_by(stat_area) %>% 
  mutate(`depth stratum` = as.numeric(`depth stratum`),
         total = sum(strata_area)) 

# average number of lobsters per trap haul at each site
u <- sublegal  %>% 
  group_by(Year, `site ID`, `effort ID`, stat_area, `depth stratum`) %>% 
  summarise(n_lob = sum(`sample ID` != 0), .groups = "drop") %>% 
  group_by(Year, `site ID`, stat_area, `depth stratum`) %>% 
  summarise(n_lob_u = mean(n_lob), .groups = "drop")

# average number of lobsters per trap haul at each depth stratum within a stat area
v <- sublegal  %>% 
  group_by(stat_area, `depth stratum`, Year, `site ID`, `effort ID`) %>% 
  summarise(n_lob = sum(`sample ID` != 0), .groups = "drop") %>% 
  group_by(stat_area, `depth stratum`, Year) %>% 
  summarise(n_lob_v = mean(n_lob), .groups = "drop")

# choose the relevant v which corresponds to the depth strata that u is in ("stat_area", "depth stratum", "Year"). Each site will have one w value
w <- left_join(v, u, by = c("stat_area", "depth stratum", "Year")) %>% 
  mutate(w = (n_lob_v-n_lob_u)^2)

#sum of all w within the same depth strata in a stat area
x <- w %>% 
  group_by(`depth stratum`, stat_area, Year) %>% 
  summarise(x = sum(w), .groups = "drop")

#number of sites per depth stratum within a given stat area
y <- sublegal %>% 
  group_by(stat_area, `depth stratum`, Year) %>% 
  summarise(site_ID = unique(`site ID`)) %>% 
  summarise(y = n(), .groups = "drop")

#if done correctly, each stat area per year should have three z values, one for each depth stratum
z <- left_join(x, y, by = c("stat_area", "depth stratum", "Year")) %>% 
  mutate(z = x/(y-1))

# Calculate stat area variance 
sublegal_vari <- left_join(z, strata_area, by = c("stat_area", "depth stratum")) %>% 
  mutate(a = 1/(total^2),
         b = strata_area*(strata_area-y)*(z/y)) %>% 
  group_by(stat_area, Year, a) %>% 
  summarise(stat_sum = sum(b), .groups = "drop") %>% 
  mutate(vari = a*stat_sum,
         sd = sqrt(vari))
strata_area_factor <- tibble("stat_area" = c(511,512,513),
                                 "1" = c(0.412162162, 0.409847936, 0.370152761),
                                 "2" = c(0.277027027, 0.28602462, 0.397179788),
                                 "3" = c(0.310810811, 0.304127444, 0.23266745)) %>% 
  pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_scale") %>% 
  mutate(`depth stratum` = as.numeric(`depth stratum`))

sublegal_cpue <- sublegal %>% 
  group_by(`trip ID`, `trip date`, Year, Month, `site ID`, 
           `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`, `effort ID`) %>% 
  mutate(lob_count = sum(`sample ID` != 0), 
            lob_effort = lob_count/`soak nights`) %>% 
  group_by(`trip ID`, `trip date`, Year, Month, 
           `site ID`, `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`) %>% 
  summarise(cpue = sum(lob_effort)/sum(!is.na(unique(`effort ID`))), .groups = "drop") %>% 
  left_join(strata_area_factor, by = c("stat_area", "depth stratum")) %>% 
  mutate(stratified_cpue = cpue*strata_scale,
         stat_area = as.factor(stat_area)) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = sum(stratified_cpue), .groups = "drop") %>% 
  mutate(name = "sublegal_cpue")

sub_leagal_cors <- sublegal_cpue %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])
            
sublegal_plot <- sublegal_cpue %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "Sublegal lobster cpue")

sublegal_var_plot <- sublegal_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "Sublegal lobster variance")


sublegal_bp <- sublegal_cpue %>% 
  nest(data = -c(stat_area, name)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(2016))),
         bp1 = purrr::map(mod, ~.x$psi[[2]]))


sublegal_plot

ME-NH Trawl Survey

Source: ME-NH Trawl Survey

Calculate stratified means

  • Calculate catch per unit effort for each site for each year
  • Multiply cpue by the depth strata area factor
  • Group by stat area, sum the outputs
stat_area_trawl_key <- tibble("stat_area" = c(511, 512, 512, 513, 513),
                              "Region" = c(1,2,3,4,5))

DMR_strata_area <- tibble("Stratum" = c("1", "2", "3", "4"),
                          "1" = c(253.27, 214.22, 227.35, 225.65),
                          "2" = c(279.63, 191.23, 211.66, 263.49),
                          "3" = c(259.62, 262.90, 280.03, 183.69),
                          "4" = c(205.30, 206.12, 310.49, 170.72),
                          "5" = c(138.54, 220.49, 365.04, 196.11)) %>% 
  pivot_longer(cols = c("1", "2", "3", "4", "5"), names_to = "Region", values_to = "strata_area") %>% 
  group_by(Region) %>% 
  mutate(Stratum = as.numeric(Stratum),
         total = sum(strata_area),
         Region = as.numeric(Region)) %>% 
  left_join(stat_area_trawl_key) %>% 
  group_by(stat_area, Stratum) %>% 
  summarise(strata_area = sum(strata_area),
            total = sum(total))

# average number of lobsters per tow 
u <- MEDMR_trawl %>% 
  left_join(stat_area_trawl_key) %>% 
  group_by(Season, Year, Tow_Number, stat_area, Stratum) %>% 
  summarise(n_lob_u = sum(Expanded_Weight_kg, na.rm = TRUE), .groups = "drop")

# average number of lobsters per trap haul at each depth stratum within a stat area
v <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>%  
  group_by(stat_area, Stratum, Year, Season) %>% 
  summarise(n_lob_v = mean(Expanded_Weight_kg, na.rm = TRUE), .groups = "drop")

# choose the relevant v which corresponds to the depth strata that u is in ("stat_area", "depth stratum", "Year"). Each site will have one w value
w <- left_join(v, u, by = c("stat_area", "Stratum", "Year", "Season")) %>% 
  mutate(w = (n_lob_v-n_lob_u)^2)

#sum of all w within the same depth strata in a stat area
x <- w %>% 
  group_by(Stratum, stat_area, Year, Season) %>% 
  summarise(x = sum(w), .groups = "drop")

#number of sites per depth stratum within a given stat area
y <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>% 
  group_by(stat_area, Stratum, Year, Season) %>% 
  summarise(Tow_number = unique(Tow_Number, na.rm = TRUE)) %>% 
  summarise(y = n(), .groups = "drop")

#if done correctly, each stat area per year should have three z values, one for each depth stratum
z <- left_join(x, y, by = c("stat_area", "Stratum", "Year", "Season")) %>% 
  mutate(z = x/(y-1))

# Calculate stat area variance 
MEDMR_vari <- left_join(z, DMR_strata_area, by = c("stat_area", "Stratum")) %>% 
  mutate(a = 1/(total^2),
         b = strata_area*(strata_area-y)*(z/y)) %>% 
  group_by(stat_area, Year, a, Season) %>% 
  summarise(stat_sum = sum(b), .groups = "drop") %>% 
  mutate(vari = a*stat_sum,
         sd = sqrt(vari))

medmr_vari_plot <- MEDMR_vari %>% 
  ggplot() + 
  geom_line(aes(Year, vari, col = as.factor(stat_area))) +
  facet_wrap(~Season)
MEDMR_cpue <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>%  
  group_by(Year, Season, stat_area, Stratum)%>%
  mutate(tows=n_distinct(Tow_Number))%>%
  group_by(Year, Season, tows, stat_area, Stratum) %>%
  summarise(weight = sum(Expanded_Weight_kg,na.rm=TRUE), 
            catch=sum(Expanded_Catch,na.rm=TRUE))%>%
  mutate(weight_tow = weight/tows, 
         catch_tow = catch/tows) %>% 
  left_join(DMR_strata_area) %>% 
  mutate(stratified_wpue = weight_tow*(strata_area/total),
         stratified_cpue = catch_tow*(strata_area/total)) %>% 
  group_by(stat_area, Year, Season) %>% 
  summarise(lob_cpue = sum(stratified_cpue), 
            lob_index = sum(stratified_wpue),
            .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area),
         name = "ME-NH_trawl")

MEDMR_cors <- MEDMR_cpue %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, Season) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])
            

MEDMR_cpue %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "ME-NH Trawl lobster cpue") +
  facet_wrap(~Season)

MEDMR_bp <- MEDMR_cpue %>% 
  nest(data = -c(stat_area, Season, name)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(2015))),
         bp1 = purrr::map(mod, ~.x$psi[[2]]))

ME yearly landings

Yearly Maine lobster landings in pounds from 1950-2020.

ME_landings_cors <- ME_landings %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area) %>% 
  summarise(corPC1 = corrr::correlate(Pounds, PC1)[[2]],
            corPC2 = corrr::correlate(Pounds, PC2)[[2]],
            corPC3 = corrr::correlate(Pounds, PC3)[[2]]) 

ME_pounds <- ME_landings %>% 
  mutate(name = "ME_landings") %>% 
  select(Year, "lob_index" = Pounds, name, stat_area) 

ME_pounds %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  labs(y = "Maine lobster landings (pounds)")

ME_pounds_bp <- ME_pounds %>% 
  nest(data = -c(stat_area, name)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(1990, 2015))),
         bp1 = purrr::map(mod, ~.x$psi[[2]]))

Nefsc index

ASFMC lobster abundance index based on the NE fisheries trawl survey. Time spans 1978-2018 and indices are split into seasons and sex.

NEFSC_cors <- NEFSCindex %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")

NEFSCindex %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  facet_wrap(~name) +
  labs(y = "Nefsc trawl lob abundance index")

NEFSCindex_bp <- NEFSCindex %>% 
  nest(data = -c(stat_area, name)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(1990, 2015))),
         bp1 = purrr::map(mod, ~.x$psi[[2]]))

NMFS stat area 511-513

Lobster biomass and abundance for statistical areas 511-513

NEFSC_meIndex <- NEFSC_meIndex %>% rename("lob_index" = `stratified biomass (kg)`)

NEFSC_meIndex_cors <- NEFSC_meIndex %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")

nmfsBIO <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "Lobster biomass") +
  scale_color_discrete(name = "Stat area")

nmfsBIO_var <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, bio_var, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "variance") +
  scale_color_discrete(name = "Stat area")

nmfsNUM <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, `stratified abundance`, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "Lobster abundance") +
  scale_color_discrete(name = "Stat area")

nmfsNUM_var <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, abun_var, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "variance") +
  scale_color_discrete(name = "Stat area")

nmfsBIO / nmfsNUM 

NEFSC_meIndex_bp <- NEFSC_meIndex %>% 
  nest(data = -c(stat_area, season)) %>% 
  mutate(mod = map(data, ~bp_analysis(.x, c(1990, 2015))),
         bp1 = purrr::map(mod, ~.x$psi[[1,2]]),
         bp2 = purrr::map(mod, ~.x$psi[[2,2]]))

Biological data analysis

xyPlot <- function(x){
  
  if(is.character(x$stat_area)){
      x <- select(x, -stat_area)
    }
  
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    ggplot() +
    geom_point(aes(PC1, lob_index, color = stat_area)) + 
    geom_smooth(aes(PC1, lob_index, color = stat_area), method = "gam", se = FALSE) +
    facet_wrap(~name)
}

stepwiseAIC <- function(x){
  
    if(is.character(x$stat_area)){
      x <- select(x, -stat_area)
    }
    x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1 + PC2 + PC3, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}

aicModel <- function(x){
  
      if(is.character(x$stat_area)){
      x <- select(x, -stat_area)
    }
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1 + PC2 + PC3, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}


aicModelGAM <- function(x, k){
  
      if(is.character(x$stat_area)){
      x <- select(x, -stat_area)
      }
  
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(gams = purrr::map(data, ~mgcv::gam(lob_index ~ s(PC1, k=k) + s(PC2, k=k) + s(PC3, k=k), 
                                              data = .x, select = TRUE, method="REML")),
           tidied = purrr::map(gams, broom::tidy)) %>% 
    select(stat_area, name, tidied) %>% 
    unnest(c(tidied))
}

           
aicModel_indicators <- function(x){
  
      if(is.character(x$stat_area)){
      x <- select(x, -stat_area)
      }
  
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, bot_temp, bot_sal, MCCPC1, predators, FirstMode, SecondMode, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ bot_temp + bot_sal + MCCPC1 + predators + FirstMode + SecondMode, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}



piecewise_reg <- function(x, bps){
  
  x <- x %>% unnest(c(data, bp1)) %>% 
    select(stat_area, name, Year, lob_index, bp1)
  
  if(is.character(x$stat_area)){
    x <- select(x, -stat_area)
  }
  
  x <- x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name, bp1, Year) %>% 
    mutate(kd = if_else(Year>bp1, 1, 0)) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1*(DK) + 
                                                               PC2*(DK) + 
                                                               PC3*(DK), data = .x), 
                                                 direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]]))
  
  x <- x %>% 
    unnest(data)
  
}


##################################################

ALSI index

The ALSI index is not significantly correlated with PC1 or PC

Correlation table

ALSI_cors %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 ALSI -0.4194353 0.3072101 0.1653596
512 ALSI -0.2646401 0.3738632 0.1615255
513 ALSI -0.4993881 0.6493318 0.5116346

Scatter plot

xyPlot(ALSI)

AIC lm model selection table

aicSel <- aicModel(ALSI)
aicSelGAM <- aicModelGAM(ALSI, 5)
aicSel2 <- aicModel_indicators(ALSI)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.1759260 0.1125357 0.3832675 2.775282 0.1196292 1 -5.825488 17.65098 19.77513 1.909621 13 15 lob_index ~ PC1 ALSI
512 0.1397737 0.0824252 0.3313509 2.437271 0.1393292 1 -4.280251 14.56050 17.06014 1.646901 15 17 lob_index ~ PC2 ALSI
513 0.6492657 0.5991608 0.3174624 12.958130 0.0006529 2 -2.965900 13.93180 17.26465 1.410953 14 17 lob_index ~ PC2 + PC3 ALSI
knitr::kable(aicSelGAM)
stat_area name term edf ref.df statistic p.value
511 ALSI s(PC1) 1.4882941 4 1.4041080 0.0498928
511 ALSI s(PC2) 0.0000041 4 0.0000002 0.7939962
511 ALSI s(PC3) 0.0877091 4 0.0240327 0.3139331
512 ALSI s(PC1) 0.0000062 4 0.0000010 0.4875469
512 ALSI s(PC2) 0.5897069 4 0.3593165 0.1385407
512 ALSI s(PC3) 0.0000041 4 0.0000005 0.5923790
513 ALSI s(PC1) 0.0000028 4 0.0000004 0.5527103
513 ALSI s(PC2) 0.9359963 4 3.6557366 0.0012759
513 ALSI s(PC3) 0.8910265 4 2.0441191 0.0086693

Subleagal index

Correlation table

sub_leagal_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 sublegal_cpue 0.4932694 -0.5993328 0.5726005
512 sublegal_cpue 0.5766013 -0.6320931 0.0502201
513 sublegal_cpue 0.4611801 -0.5554480 -0.3723648

Scatter plot

xyPlot(sublegal_cpue)

AIC lm model selection table

aicSelGAM <- aicModelGAM(sublegal_cpue, 4)
aicSel <- aicModel(sublegal_cpue)
aicSel2 <- aicModel_indicators(sublegal_cpue)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.5910509 0.4888137 984.1015 5.781169 0.0279690 2 -89.66585 187.3317 188.9233 7747646 8 11 lob_index ~ PC2 + PC3 sublegal_cpue
512 0.3995417 0.3328241 3207.2477 5.988552 0.0369289 1 -103.30949 212.6190 213.8127 92577938 9 11 lob_index ~ PC2 sublegal_cpue
513 0.3085225 0.2316917 1464.6883 4.015608 0.0760731 1 -94.68801 195.3760 196.5697 19307808 9 11 lob_index ~ PC2 sublegal_cpue
knitr::kable(aicSelGAM)
stat_area name term edf ref.df statistic p.value
511 sublegal_cpue s(PC1) 0.0000503 3 0.0000070 0.6157636
511 sublegal_cpue s(PC2) 0.8202627 3 1.5162891 0.0440829
511 sublegal_cpue s(PC3) 0.7952929 3 1.2948368 0.0557396
512 sublegal_cpue s(PC1) 0.8877120 3 0.5732628 0.1778300
512 sublegal_cpue s(PC2) 0.6330516 3 0.5747109 0.1049775
512 sublegal_cpue s(PC3) 0.0000518 3 0.0000023 0.9114541
513 sublegal_cpue s(PC1) 0.0001354 3 0.0000317 0.4395255
513 sublegal_cpue s(PC2) 0.8145738 3 1.4640482 0.0414786
513 sublegal_cpue s(PC3) 0.8303740 3 0.4817565 0.2221681

ME yearly landings

Correlation table

ME_landings_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area corPC1 corPC2 corPC3

Scatter plot

xyPlot(ME_pounds)

AIC lm model selection table

aicSelGAM <- aicModelGAM(ME_pounds, 5)
aicSel <- aicModel(ME_pounds)
aicSel2 <- aicModel_indicators(ME_pounds)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.2484089 0.2028579 32420331 5.453427 0.0089887 2 -672.1102 1352.220 1358.555 3.468557e+16 33 36 lob_index ~ PC1 + PC2 ME_landings
512 0.1596119 0.1348946 33774123 6.457500 0.0157796 1 -674.1203 1354.241 1358.991 3.878351e+16 34 36 lob_index ~ PC2 ME_landings
513 0.3718964 0.3338295 29637548 9.769553 0.0004651 2 -668.8795 1345.759 1352.093 2.898668e+16 33 36 lob_index ~ PC1 + PC2 ME_landings
knitr::kable(aicSelGAM)
stat_area name term edf ref.df statistic p.value
511 ME_landings s(PC1) 1.1419314 4 0.6751062 0.1046332
511 ME_landings s(PC2) 1.5806096 4 2.1875071 0.0071152
511 ME_landings s(PC3) 0.0008035 4 0.0001255 0.4743764
512 ME_landings s(PC1) 0.9048502 4 0.3531980 0.2155047
512 ME_landings s(PC2) 0.8566724 4 1.4797510 0.0125743
512 ME_landings s(PC3) 0.0003620 4 0.0000200 0.7621572
513 ME_landings s(PC1) 0.8235815 4 1.1663465 0.0192908
513 ME_landings s(PC2) 1.7420407 4 4.1600693 0.0003216
513 ME_landings s(PC3) 0.6312029 4 0.2467791 0.2130456

ME Trawl index

Correlation table

MEDMR_cors %>% 
  na.omit() %>% 
  knitr::kable()
stat_area Season corPC1 corPC2 corPC3
511 Fall 0.6682315 -0.4816462 0.0686523
511 Spring 0.6491385 -0.6148421 0.0777621
512 Fall 0.2990573 -0.4904639 -0.3677374
512 Spring 0.8260627 -0.7212692 -0.1826128
513 Fall 0.2835740 -0.1419138 -0.6338938
513 Spring 0.3547642 -0.2634394 -0.6122392

Scatter plot

xyPlot(MEDMR_cpue)

AIC lm model selection table

aicSel <- aicModel(MEDMR_cpue)
aicSelGAM <- aicModelGAM(MEDMR_cpue, 5)
aicSel2 <- aicModel_indicators(MEDMR_cpue)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.2520418 0.2279141 11.30559 10.446168 0.0029114 1 -125.8282 257.6564 262.1459 3962.305 31 33 lob_index ~ PC1 ME-NH_trawl
512 0.3246583 0.2796355 15.32223 7.210977 0.0027724 2 -135.3194 278.6388 284.6249 7043.123 30 33 lob_index ~ PC2 + PC3 ME-NH_trawl
513 0.3834734 0.3635855 22.56166 19.281692 0.0001219 1 -148.6297 303.2594 307.7489 15779.887 31 33 lob_index ~ PC3 ME-NH_trawl
knitr::kable(aicSelGAM)
stat_area name term edf ref.df statistic p.value
511 ME-NH_trawl s(PC1) 1.5546299 4 1.9841589 0.0078687
511 ME-NH_trawl s(PC2) 0.3831062 4 0.1549088 0.1922815
511 ME-NH_trawl s(PC3) 0.0000867 4 0.0000094 0.5528045
512 ME-NH_trawl s(PC1) 0.2418640 4 0.0703663 0.3080260
512 ME-NH_trawl s(PC2) 0.9026573 4 2.3138608 0.0022853
512 ME-NH_trawl s(PC3) 0.5777374 4 0.3419736 0.1333209
513 ME-NH_trawl s(PC1) 0.8969429 4 0.3981576 0.1538907
513 ME-NH_trawl s(PC2) 1.3321819 4 0.9360308 0.0568851
513 ME-NH_trawl s(PC3) 0.9534783 4 5.1131060 0.0000568

NEFSC trawl index

Correlation table

NEFSC_cors %>% 
  filter(name %in% c("NefscFQ2", "NefscFQ4", "NefscMQ2", "NefscMQ4")) %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3

Scatter plot

xyPlot(NEFSCindex)

AIC lm model selection table

aicSelGAM <- aicModelGAM(NEFSCindex, 5)
aicSel <- aicModel(NEFSCindex)
aicSel2 <- aicModel_indicators(NEFSCindex)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.3144294 0.2728796 1.966978 7.567543 0.0019718 2 -73.86951 155.7390 162.0731 127.67701 33 36 lob_index ~ PC1 + PC2 NefscFQ2
512 0.2476817 0.2020866 2.060507 5.432205 0.0091333 2 -75.54186 159.0837 165.4178 140.10775 33 36 lob_index ~ PC1 + PC2 NefscFQ2
513 0.3387701 0.2986955 1.931744 8.453499 0.0010860 2 -73.21882 154.4376 160.7717 123.14393 33 36 lob_index ~ PC1 + PC2 NefscFQ2
511 0.3780106 0.3403142 1.255375 10.027782 0.0003957 2 -57.70321 123.4064 129.7405 52.00687 33 36 lob_index ~ PC1 + PC2 NefscMQ2
512 0.3079663 0.2660249 1.324175 7.342770 0.0023020 2 -59.62402 127.2480 133.5821 57.86354 33 36 lob_index ~ PC1 + PC2 NefscMQ2
513 0.4109079 0.3752053 1.221725 11.509201 0.0001614 2 -56.72508 121.4502 127.7842 49.25621 33 36 lob_index ~ PC1 + PC2 NefscMQ2
knitr::kable(aicSelGAM)
stat_area name term edf ref.df statistic p.value
511 NefscFQ2 s(PC1) 1.2876032 4 1.4506571 0.0191078
511 NefscFQ2 s(PC2) 1.5880929 4 2.2550748 0.0063438
511 NefscFQ2 s(PC3) 0.0001103 4 0.0000148 0.5213610
512 NefscFQ2 s(PC1) 0.7669306 4 0.8215864 0.0460963
512 NefscFQ2 s(PC2) 0.8480125 4 1.3946331 0.0148886
512 NefscFQ2 s(PC3) 0.0000609 4 0.0000080 0.5078377
513 NefscFQ2 s(PC1) 0.8374938 4 1.2882553 0.0182335
513 NefscFQ2 s(PC2) 0.9070660 4 2.4384850 0.0023881
513 NefscFQ2 s(PC3) 0.0000246 4 0.0000039 0.4821666
511 NefscMQ2 s(PC1) 1.4701779 4 2.0387306 0.0078542
511 NefscMQ2 s(PC2) 0.9197827 4 2.8571796 0.0011273
511 NefscMQ2 s(PC3) 0.0000296 4 0.0000039 0.5180040
512 NefscMQ2 s(PC1) 0.8101715 4 1.0669234 0.0280692
512 NefscMQ2 s(PC2) 0.8951363 4 2.1314256 0.0040081
512 NefscMQ2 s(PC3) 0.1950601 4 0.0605743 0.2730047
513 NefscMQ2 s(PC1) 0.8765322 4 1.7747800 0.0074476
513 NefscMQ2 s(PC2) 0.9330253 4 3.4797996 0.0004663
513 NefscMQ2 s(PC3) 0.0000028 4 0.0000002 0.6414642

Cluster Analysis

GOMindex_ca <- MEDMR_cpue %>% 
  filter(Season == "Spring") %>% 
  select(Year, lob_index, name, stat_area)

NEFSCindex_ca <- NEFSCindex %>% 
  filter(Season == 2) %>% 
  select(Year, lob_index, name)

lobData <- bind_rows(ALSI, sublegal_cpue, ME_pounds, GOMindex_ca, NEFSCindex_ca) %>% 
  pivot_wider(names_from = c(name, stat_area),
              values_from = lob_index) %>% 
  na.omit() %>% 
  column_to_rownames("Year") 

# standardize variables
lobData <- scale(lobData) %>% 
  data.frame()

lobData_t <- data.table::transpose(lobData)

# get row and colnames in order
colnames(lobData_t) <- rownames(lobData)
rownames(lobData_t) <- colnames(lobData)



# determine number of clusters
wss <- (nrow(lobData)-1)*sum(apply(lobData,2,var)) # get sum of squares

for (i in 2:12) wss[i] <- sum(kmeans(lobData,
   centers=i)$withinss)

# look for break in plot like a scree plot
breakplot <- plot(1:12, wss, type="b", xlab="Number of Clusters",
  ylab="Within groups sum of squares")

breakinfo <- fpc::pamk(lobData)

# K-Means Cluster Analysis
fit <- kmeans(lobData, 2) # 3 cluster solution
# get cluster means
cluster_means <- aggregate(lobData,by=list(fit$cluster),FUN=mean)
# append cluster assignment
lobData_cluster <- data.frame(lobData, fit$cluster)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
cluster::clusplot(lobData, fit$cluster, color=TRUE, shade=TRUE,
   labels=2, lines=0)

# Centroid Plot against 1st 2 discriminant functions
centroidPlot <- fpc::plotcluster(lobData, fit$cluster)

Variance

Variance shows the spread in the data. We are using this stat to see if these indicators and the response variables are becoming more variable with time.

p0 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, bot_temp_var)) +
  facet_wrap(~stat_area) +
  labs(y = "Bottom temp variance")

p1 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, sur_temp_var)) +
  facet_wrap(~stat_area) +
  labs(y = "SST variance (FVCOM)")

p2 <- yrOISST %>% 
  ggplot() +
  geom_line(aes(Year, temp_var)) +
  facet_wrap(~stat_area)  +
  labs(y = "SST variance (OISST)")
  

p3 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, bot_sal_var)) +
  facet_wrap(~stat_area) +
  labs(y = "Bottom salinity variance")

p4 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, sur_sal_var)) +
  facet_wrap(~stat_area) +
  labs(y = "SSS variance")

p5 <- sublegal_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari)) +
  facet_wrap(~stat_area) +
  labs("VTS variance")

p6 <- MEDMR_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = Season)) +
  facet_wrap(~stat_area) +
  labs("ME-NH lob biomass var")

p6_1 <- MEDMR_vari %>% 
  mutate(vari = if_else(vari > 1000, 500, vari)) %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = Season)) +
  facet_wrap(~stat_area) +
  labs("ME-NH lob biomass var")

p7 <- NEFSC_meIndex %>% 
  filter(Year >= 1980) %>% 
  ggplot() +
  geom_line(aes(Year, bio_var, col = season)) +
  facet_wrap(~stat_area) +
  labs("NEFSC lob biomass var")

Temperature

p0 / p1 / p2

Salinity

p3 / p4

Sublegal Lobsters

p5

ME-NH trawl survey

p6

p6_1

Nefsc trawl survey

p7